{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6617c9a3",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "6617c9a3",
    "outputId": "1f0b0afe-45a3-4050-ca46-e12ba9a888a4"
   },
   "outputs": [],
   "source": [
    "try:\n",
    "    import evidently\n",
    "except:\n",
    "    !pip install git+https://github.com/evidentlyai/evidently.git"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "12dd086f",
   "metadata": {
    "id": "12dd086f"
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import requests\n",
    "import zipfile\n",
    "import io\n",
    "\n",
    "from datetime import datetime, time\n",
    "from sklearn import datasets, ensemble\n",
    "\n",
    "from evidently import ColumnMapping\n",
    "from evidently.test_suite import TestSuite\n",
    "from evidently.report import Report\n",
    "from evidently.metrics import DataDriftTable\n",
    "from evidently.tests import TestShareOfDriftedColumns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "SG6AAsK2q6zR",
   "metadata": {
    "id": "SG6AAsK2q6zR"
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "warnings.simplefilter('ignore')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c3531990",
   "metadata": {
    "id": "c3531990"
   },
   "source": [
    "## Prepare Datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "efWJJAvThmff",
   "metadata": {
    "id": "efWJJAvThmff"
   },
   "outputs": [],
   "source": [
    "content = requests.get(\"https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip\", verify=False).content\n",
    "with zipfile.ZipFile(io.BytesIO(content)) as arc:\n",
    "    raw_data = pd.read_csv(arc.open(\"hour.csv\"), header=0, sep=',', parse_dates=['dteday'], index_col='dteday')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7vSIXrL8rm0k",
   "metadata": {
    "id": "7vSIXrL8rm0k"
   },
   "outputs": [],
   "source": [
    "raw_data.index = raw_data.apply(\n",
    "    lambda row: datetime.combine(row.name, time(hour=int(row['hr']))), axis = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "pOuDAm72rRo4",
   "metadata": {
    "id": "pOuDAm72rRo4"
   },
   "outputs": [],
   "source": [
    "target = 'cnt'\n",
    "prediction = 'prediction'\n",
    "numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'hr', 'weekday']\n",
    "categorical_features = ['season', 'holiday', 'workingday']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "m_0e2d5jrS34",
   "metadata": {
    "id": "m_0e2d5jrS34"
   },
   "outputs": [],
   "source": [
    "reference = raw_data.loc['2011-01-01 00:00:00':'2011-01-28 23:00:00']\n",
    "current = raw_data.loc['2011-01-29 00:00:00':'2011-02-28 23:00:00']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "aZIblTePrr4c",
   "metadata": {
    "id": "aZIblTePrr4c"
   },
   "source": [
    "## Regression Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "zPa6UX6erV8L",
   "metadata": {
    "id": "zPa6UX6erV8L"
   },
   "outputs": [],
   "source": [
    "regressor = ensemble.RandomForestRegressor(random_state = 42, n_estimators = 50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "wn3SDMeJrZI6",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 75
    },
    "id": "wn3SDMeJrZI6",
    "outputId": "b1053aac-dd92-48af-ad86-55312b882f7a"
   },
   "outputs": [],
   "source": [
    "regressor.fit(reference[numerical_features + categorical_features], reference[target])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "nlAPLRQrrxtI",
   "metadata": {
    "id": "nlAPLRQrrxtI"
   },
   "outputs": [],
   "source": [
    "ref_prediction = regressor.predict(reference[numerical_features + categorical_features])\n",
    "current_prediction = regressor.predict(current[numerical_features + categorical_features])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3qkKRBdcr1NF",
   "metadata": {
    "id": "3qkKRBdcr1NF"
   },
   "outputs": [],
   "source": [
    "reference['prediction'] = ref_prediction\n",
    "current['prediction'] = current_prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5vBnaeSdr-Q4",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "5vBnaeSdr-Q4",
    "outputId": "dd4732ce-1f77-4369-952a-06a8781da4cd"
   },
   "outputs": [],
   "source": [
    "regressor.feature_importances_"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "JipcXBmXr5DT",
   "metadata": {
    "id": "JipcXBmXr5DT"
   },
   "source": [
    "## Drift analysis with importances: default importances estimation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9MhcoUKQ2AQp",
   "metadata": {
    "id": "9MhcoUKQ2AQp"
   },
   "outputs": [],
   "source": [
    "column_mapping = ColumnMapping()\n",
    "\n",
    "column_mapping.target = target\n",
    "column_mapping.prediction = prediction\n",
    "column_mapping.numerical_features = numerical_features\n",
    "column_mapping.categorical_features = categorical_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "PSmjajjJsJms",
   "metadata": {
    "id": "PSmjajjJsJms"
   },
   "outputs": [],
   "source": [
    "report = Report(metrics = [\n",
    "    DataDriftTable(feature_importance=True)\n",
    "])\n",
    "report.run(reference_data=reference,\n",
    "           current_data=current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'],\n",
    "           column_mapping=column_mapping\n",
    "           )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "IEamMeO6su72",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "IEamMeO6su72",
    "outputId": "270729ac-265c-4c1e-c36e-57e0737df644"
   },
   "outputs": [],
   "source": [
    "report.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "Yi_nSkaK2xTr",
   "metadata": {
    "id": "Yi_nSkaK2xTr"
   },
   "source": [
    "## Drift analysis with importances: custom importances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "Tm40lE-Z2p91",
   "metadata": {
    "id": "Tm40lE-Z2p91"
   },
   "outputs": [],
   "source": [
    "column_mapping = ColumnMapping()\n",
    "\n",
    "column_mapping.target = target\n",
    "column_mapping.prediction = prediction\n",
    "column_mapping.numerical_features = numerical_features\n",
    "column_mapping.categorical_features = categorical_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "nVeajUqw2wMd",
   "metadata": {
    "id": "nVeajUqw2wMd"
   },
   "outputs": [],
   "source": [
    "report = Report(metrics = [\n",
    "    DataDriftTable(feature_importance=True)\n",
    "])\n",
    "report.run(reference_data=reference,\n",
    "           current_data=current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'],\n",
    "           column_mapping=column_mapping,\n",
    "           additional_data = {'current_feature_importance':\n",
    "              dict(map(lambda i,j : (i,j), numerical_features + categorical_features, regressor.feature_importances_))\n",
    "            }\n",
    "           )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "vi4q2l9V3fdJ",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "vi4q2l9V3fdJ",
    "outputId": "e86d0d9b-c2df-47f7-a6cd-76baf800dc90"
   },
   "outputs": [],
   "source": [
    "report.show()"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
